import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('plant_vase1(2).csv')
df.head()
| year | month | day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | irrgation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020 | 3 | 6 | 22 | 16 | 11 | 0.33 | 0.40 | 0.36 | 0.23 | 0.02 | False |
| 1 | 2020 | 3 | 6 | 22 | 17 | 11 | 0.32 | 0.39 | 0.35 | 0.23 | 0.02 | False |
| 2 | 2020 | 3 | 6 | 22 | 18 | 11 | 0.31 | 0.39 | 0.34 | 0.22 | 0.02 | False |
| 3 | 2020 | 3 | 6 | 22 | 19 | 11 | 0.30 | 0.38 | 0.33 | 0.21 | 0.02 | False |
| 4 | 2020 | 3 | 6 | 22 | 20 | 11 | 0.29 | 0.38 | 0.33 | 0.21 | 0.02 | False |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4409 entries, 0 to 4408 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 4409 non-null int64 1 month 4409 non-null int64 2 day 4409 non-null int64 3 hour 4409 non-null int64 4 minute 4409 non-null int64 5 second 4409 non-null int64 6 moisture0 4409 non-null float64 7 moisture1 4409 non-null float64 8 moisture2 4409 non-null float64 9 moisture3 4409 non-null float64 10 moisture4 4409 non-null float64 11 irrgation 4409 non-null bool dtypes: bool(1), float64(5), int64(6) memory usage: 383.3 KB
df.dtypes
year int64 month int64 day int64 hour int64 minute int64 second int64 moisture0 float64 moisture1 float64 moisture2 float64 moisture3 float64 moisture4 float64 irrgation bool dtype: object
print(list(df))
['year', 'month', 'day', 'hour', 'minute', 'second', 'moisture0', 'moisture1', 'moisture2', 'moisture3', 'moisture4', 'irrgation']
sns.lmplot(x="day", y="moisture4", hue="moisture4",palette="rocket", data=df)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="icefire", data=df)
plt.show()
sns.lmplot(x="minute", y="moisture4", hue="moisture4",palette="coolwarm",data=df)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="cubehelix",data=df)
plt.show()
plt.show()
sns.lmplot(x="moisture0", y="moisture4", hue="moisture4",palette="YlOrBr", data=df)
plt.show()
sns.lmplot(x="moisture1", y="moisture4", hue="moisture4",palette="Blues", data=df)
plt.show()
sns.lmplot(x="moisture2", y="moisture4", hue="moisture4",palette="viridis", data=df)
plt.show()
sns.lmplot(x="moisture3", y="moisture4", hue="moisture4",palette="rocket_r", data=df)
plt.show()
plt.show()
df.drop(['irrgation','year','month'],axis=1,inplace=True)
df.head()
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 22 | 16 | 11 | 0.33 | 0.40 | 0.36 | 0.23 | 0.02 |
| 1 | 6 | 22 | 17 | 11 | 0.32 | 0.39 | 0.35 | 0.23 | 0.02 |
| 2 | 6 | 22 | 18 | 11 | 0.31 | 0.39 | 0.34 | 0.22 | 0.02 |
| 3 | 6 | 22 | 19 | 11 | 0.30 | 0.38 | 0.33 | 0.21 | 0.02 |
| 4 | 6 | 22 | 20 | 11 | 0.29 | 0.38 | 0.33 | 0.21 | 0.02 |
df.isnull().sum()
day 0 hour 0 minute 0 second 0 moisture0 0 moisture1 0 moisture2 0 moisture3 0 moisture4 0 dtype: int64
df[['moisture0', 'moisture1','moisture2', 'moisture3']].plot()
<AxesSubplot:>
df[df.columns[1:]].corr()['moisture3'][:]
hour 0.129913 minute -0.025485 second 0.021293 moisture0 0.177459 moisture1 0.068022 moisture2 0.023048 moisture3 1.000000 moisture4 0.568807 Name: moisture3, dtype: float64
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | |
|---|---|---|---|---|---|---|---|---|---|
| day | 1.000000 | -0.089261 | -0.015033 | 0.141041 | -0.619495 | -0.351188 | 0.382697 | -0.050850 | 0.069838 |
| hour | -0.089261 | 1.000000 | 0.000517 | -0.200921 | 0.076410 | 0.071533 | 0.323242 | 0.129913 | -0.146300 |
| minute | -0.015033 | 0.000517 | 1.000000 | 0.002732 | -0.004522 | -0.001688 | -0.005108 | -0.025485 | -0.014328 |
| second | 0.141041 | -0.200921 | 0.002732 | 1.000000 | -0.088478 | 0.075844 | 0.119998 | 0.021293 | 0.227909 |
| moisture0 | -0.619495 | 0.076410 | -0.004522 | -0.088478 | 1.000000 | 0.015492 | 0.221128 | 0.177459 | 0.071691 |
| moisture1 | -0.351188 | 0.071533 | -0.001688 | 0.075844 | 0.015492 | 1.000000 | -0.188187 | 0.068022 | 0.240432 |
| moisture2 | 0.382697 | 0.323242 | -0.005108 | 0.119998 | 0.221128 | -0.188187 | 1.000000 | 0.023048 | -0.069030 |
| moisture3 | -0.050850 | 0.129913 | -0.025485 | 0.021293 | 0.177459 | 0.068022 | 0.023048 | 1.000000 | 0.568807 |
| moisture4 | 0.069838 | -0.146300 | -0.014328 | 0.227909 | 0.071691 | 0.240432 | -0.069030 | 0.568807 | 1.000000 |
X = df.drop('moisture4',axis=1)
X.head()
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 22 | 16 | 11 | 0.33 | 0.40 | 0.36 | 0.23 |
| 1 | 6 | 22 | 17 | 11 | 0.32 | 0.39 | 0.35 | 0.23 |
| 2 | 6 | 22 | 18 | 11 | 0.31 | 0.39 | 0.34 | 0.22 |
| 3 | 6 | 22 | 19 | 11 | 0.30 | 0.38 | 0.33 | 0.21 |
| 4 | 6 | 22 | 20 | 11 | 0.29 | 0.38 | 0.33 | 0.21 |
y = df['moisture4']
y.head()
0 0.02 1 0.02 2 0.02 3 0.02 4 0.02 Name: moisture4, dtype: float64
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
sc = StandardScaler()
X = sc.fit_transform(X)
X
array([[-2.26404799, 1.46064442, -0.78308591, ..., -0.07531016,
-1.8633435 , 5.63158327],
[-2.26404799, 1.46064442, -0.72519383, ..., -0.12750169,
-2.02247191, 5.63158327],
[-2.26404799, 1.46064442, -0.66730175, ..., -0.12750169,
-2.18160032, 5.09799096],
...,
[ 1.21892047, 1.60288272, 0.89578442, ..., -0.12750169,
0.68271104, -0.23793206],
[ 1.21892047, 1.60288272, 0.9536765 , ..., -0.12750169,
0.68271104, -0.23793206],
[ 1.21892047, 1.60288272, 1.01156858, ..., -0.12750169,
0.68271104, -0.23793206]])
from sklearn import metrics
X.shape
(4409, 8)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
X_train
array([[ 0.05793098, -1.52635987, -1.24622255, ..., 0.23783901,
0.36445422, 0.29566024],
[ 0.05793098, 1.31840612, -0.55151759, ..., -0.75380005,
0.84183945, 2.43002945],
[-1.10305851, -0.67293007, -0.26205719, ..., 2.74303244,
-1.06770146, -0.23793206],
...,
[ 1.21892047, -1.52635987, 1.18524482, ..., -0.91037464,
0.52358263, -1.30511667],
[ 1.21892047, 0.18049972, -1.07254631, ..., -0.12750169,
0.04619741, 0.82925254],
[-1.10305851, 0.03826142, 0.3747557 , ..., 0.9163289 ,
0.36445422, -0.77152437]])
y_test
2983 0.02
1499 0.02
4112 0.03
478 0.06
530 0.04
...
3565 0.03
1580 0.03
2989 0.03
839 0.03
500 0.04
Name: moisture4, Length: 1323, dtype: float64
def predict(algorithm):
model = algorithm.fit(X_train,y_train)
print('Training Score: {}'.format(model.score(X_train,y_train)))
print('Test Accuracy: {}'.format(model.score(X_test, y_test)))
preds = model.predict(X_test)
print('Predictions are: {}'.format(preds))
print('\n')
r2_score = metrics.r2_score(y_test,preds)
print('r2_score is:{}'.format(r2_score))
print('MAE:',metrics.mean_absolute_error(y_test,preds))
print('MSE:',metrics.mean_squared_error(y_test,preds))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,preds)))
sns.distplot(y_test-preds,color='red')
from sklearn.metrics import accuracy_score as score
from sklearn.linear_model import LinearRegression
predict(LinearRegression())
Training Score: 0.5041848832473712 Test Accuracy: 0.5326493426777246 Predictions are: [0.03098928 0.02595137 0.02978913 ... 0.05211637 0.0283853 0.03645733] r2_score is:0.5326493426777246 MAE: 0.0043283793066388205 MSE: 3.804635379579165e-05 RMSE: 0.0061681726463995515
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
ln_model = LinearRegression()
ln_model.fit(X_train, y_train)
preds1 = ln_model.predict(X_test)
preds1
array([0.03098928, 0.02595137, 0.02978913, ..., 0.05211637, 0.0283853 ,
0.03645733])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds1, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.ensemble import RandomForestRegressor
predict(RandomForestRegressor())
Training Score: 0.9810927455058502 Test Accuracy: 0.8850775140744942 Predictions are: [0.0254 0.0202 0.03 ... 0.0573 0.0271 0.04 ] r2_score is:0.8850775140744942 MAE: 0.0013712773998488434 MSE: 9.355676492819348e-06 RMSE: 0.003058705035275443
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
preds2 = rf.predict(X_test)
preds2
array([0.0248, 0.0201, 0.03 , ..., 0.0521, 0.0274, 0.04 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds2, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.neighbors import KNeighborsRegressor
predict(KNeighborsRegressor())
Training Score: 0.902437059019282 Test Accuracy: 0.8537095608308689 Predictions are: [0.024 0.024 0.03 ... 0.036 0.028 0.04 ] r2_score is:0.8537095608308689 MAE: 0.0016190476190476193 MSE: 1.1909297052154195e-05 RMSE: 0.0034509849394273218
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
preds3 = knn.predict(X_test)
preds3
array([0.024, 0.024, 0.03 , ..., 0.036, 0.028, 0.04 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds3, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.tree import DecisionTreeRegressor
predict(DecisionTreeRegressor())
Training Score: 1.0 Test Accuracy: 0.8356600169272899 Predictions are: [0.02 0.02 0.03 ... 0.04 0.03 0.04] r2_score is:0.8356600169272899 MAE: 0.0012471655328798272 MSE: 1.3378684807256232e-05 RMSE: 0.0036576884513660033
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
preds4= dt.predict(X_test)
preds4
array([0.02, 0.02, 0.03, ..., 0.06, 0.03, 0.04])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
dt = KNeighborsRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
array([0.024, 0.024, 0.03 , ..., 0.036, 0.028, 0.04 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from xgboost.sklearn import XGBRegressor
predict( XGBRegressor())
Training Score: 0.958998725106501 Test Accuracy: 0.8894250637732644 Predictions are: [0.02488854 0.0192304 0.03029314 ... 0.06017064 0.02752487 0.04212941] r2_score is:0.8894250637732644 MAE: 0.0015555962965241544 MSE: 9.00174864144556e-06 RMSE: 0.003000291426086066
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
preds5 = xgb.predict(X_test)
preds5
array([0.02488854, 0.0192304 , 0.03029314, ..., 0.06017064, 0.02752487,
0.04212941], dtype=float32)
import plotly.express as px
fig = px.scatter(x=y_test, y=preds5, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
The history saving thread hit an unexpected error (OperationalError('database is locked')).History will not be written to the database.
from sklearn.model_selection import RandomizedSearchCV
param_random = {
'bootstrap': [True],
'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
'max_features': ['auto', 'sqrt'],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]
}
param_random
{'bootstrap': [True],
'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
'max_features': ['auto', 'sqrt'],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = param_random, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train,y_train)
Fitting 5 folds for each of 100 candidates, totalling 500 fits
RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(), n_iter=100,
n_jobs=-1,
param_distributions={'bootstrap': [True],
'max_depth': [10, 20, 30, 40, 50, 60,
70, 80, 90, 100, None],
'max_features': ['auto', 'sqrt'],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'n_estimators': [200, 400, 600, 800,
1000, 1200, 1400, 1600,
1800, 2000]},
random_state=42, verbose=2)
rf_random.best_params_
{'n_estimators': 600,
'min_samples_split': 5,
'min_samples_leaf': 1,
'max_features': 'sqrt',
'max_depth': 30,
'bootstrap': True}
preds6 = rf_random.predict(X_test)
preds6
array([0.02584873, 0.02040746, 0.0300475 , ..., 0.04496659, 0.02776196,
0.04015556])
print("Train Accuracy:", rf_random.score(X_train, y_train))
print("Test Accuracy:" , rf_random.score(X_test, y_test))
Train Accuracy: 0.9562477853265423 Test Accuracy: 0.9016359848893354
import plotly.express as px
fig = px.scatter(x=y_test, y=preds6, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from xgboost import XGBRegressor
def hyperParameterTuning(X_train, y_train):
param_tuning = {
'learning_rate': [0.01, 0.1],
'max_depth': [3, 5, 7, 10],
'min_child_weight': [1, 3, 5],
'subsample': [0.5, 0.7],
'colsample_bytree': [0.5, 0.7],
'n_estimators' : [100, 200, 500],
'objective': ['reg:squarederror']
}
XGB = XGBRegressor()
gridsearch = GridSearchCV(estimator = XGB,
param_grid = param_tuning,
cv = 5,
n_jobs = -1,
verbose = 1)
gridsearch.fit(X_train,y_train)
return gridsearch.best_params
XGB_grid = XGBRegressor(
objective = 'reg:squarederror',
colsample_bytree = 0.5,
learning_rate = 0.05,
max_depth = 6,
min_child_weight = 1,
n_estimators = 1000,
subsample = 0.7)
XGB_grid.fit(X_train, y_train, early_stopping_rounds=5, eval_set=[(X_test, y_test)], verbose=False)
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=0.5, enable_categorical=False,
gamma=0, gpu_id=-1, importance_type=None,
interaction_constraints='', learning_rate=0.05, max_delta_step=0,
max_depth=6, min_child_weight=1, missing=nan,
monotone_constraints='()', n_estimators=1000, n_jobs=8,
num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
reg_lambda=1, scale_pos_weight=1, subsample=0.7,
tree_method='exact', validate_parameters=1, verbosity=None)
preds8 = XGB_grid.predict(X_test)
preds8
array([0.02459604, 0.0198236 , 0.02997729, ..., 0.04995943, 0.02784717,
0.0398635 ], dtype=float32)
print("Train Accuracy:", XGB_grid.score(X_train, y_train))
Train Accuracy: 0.9450647698352641
print("Test Accuracy:" , XGB_grid.score(X_test, y_test))
Test Accuracy: 0.9073673913666416
import plotly.express as px
fig = px.scatter(x=y_test, y=preds8, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()